---
title: "Subgroup analysis: education"
author: "[redacted]"
date: "2025-04-07"
output: html_document
---

```{r}
```

#####LOAD PACKAGES AND DATA

```{r}
#Load packages
library(tidyverse)
library(cjoint)
library(cregg)

#Load data
conjoint_edu <- cjoint::read.qualtrics(
  "data_clean.csv",
  covariates = c("education"),
  respondentID = "ResponseId",
  letter = "F",
  new.format = TRUE,
  responses = paste0("conjoint_", c("intro", 1:8))
)

#Remove NAs
conjoint_edu <- conjoint_edu %>% drop_na(selected)

names(conjoint_edu)

```

#RECODE COVARIATE

```{r}

conjoint_edu <- conjoint_edu %>%
  mutate(
    edu_cat = case_when(
      education %in% c(5, 6, 7) ~ "university-educated",   # Levels 5, 6, 7 recoded as "university-educated"
      TRUE ~ "not university-educated"                     # All other levels recoded as "not university-educated"
    ),
    edu_cat = factor(edu_cat, levels = c("not university-educated", "university-educated"))
  )

conjoint_edu <- conjoint_edu %>% drop_na(edu_cat)

summary(conjoint_edu$edu_cat)


```

#RENAME VARIABLES

```{r}

# Renaming attributes and levels in English
conjoint_edu <-  conjoint_edu %>% dplyr::rename(Topic=Onderwerp, 
                                            Relation.to.you=Relatie.tot.u,
                                            Apparent.knowledge.of.the.person=Schijnbare.kennis.van.de.persoon,
                                            Apparent.concern.of.the.person=Schijnbare.bezorgdheid.van.de.persoon,
                                            Apparent.motivation.of.the.person=Schijnbare.motivatie.van.de.persoon,
                                            Closeness.with.you=Band.met.u,
                                            Gender.of.the.person=Geslacht.van.de.persoon
                                            ) %>%
                                     mutate(
                                            Topic = recode_factor(Topic,
                                                  "overstromingen" = "floods", 
                                                  "besmettelijke ziekten" = "infectious diseases",
                                                  "pesticiden" = "pesticides",
                                                  "levensstijlziekten" = "lifestyle diseases",),
                                            Gender.of.the.person = recode_factor(Gender.of.the.person,
                                                  "vrouw" = "female", 
                                                  "man" = "male"),
                                            Relation.to.you = recode_factor(Relation.to.you,
                                                  "vriend/vriendin (platonisch)" = "friend",
                                                  "collega/studiegenoot" = "colleague/fellow student",
                                                  "familielid" = "relative"),
                                            Closeness.with.you = recode_factor(Closeness.with.you,
                                                  "zeer hecht" = "very close", 
                                                  "redelijk hecht" = "fairly close",
                                                  "enigszins hecht" = "somewhat close",
                                                  "niet hecht" = "not close"),
                                            Apparent.knowledge.of.the.person = recode_factor(Apparent.knowledge.of.the.person,
                                                  "zeer goed geinformeerd" = "very well-informed", 
                                                  "redelijk goed geinformeerd" = "fairly well-informed",
                                                  "enigszins geinformeerd" = "somewhat well-informed",
                                                  "niet geinformeerd" = "not well-informed"),
                                            Apparent.concern.of.the.person = recode_factor(Apparent.concern.of.the.person,
                                                  "zeer bezorgd" = "very concerned", 
                                                  "redelijk bezorgd" = "fairly concerned",
                                                  "enigszins bezorgd" = "somewhat concerned",
                                                  "niet bezorgd" = "not concerned"),
                                            Apparent.motivation.of.the.person = recode_factor(Apparent.motivation.of.the.person,
                                                  "u te overtuigen om meer maatregelen te nemen" = "to convince you to take more precautions", 
                                                  "informatie over het risico met u te willen uitwisselen" = "to exchange information with you",
                                                  "zich minder ongerust voelen" = "to feel less anxious",
                                                  "u te overtuigen om minder maatregelen te nemen" = "to convince you to take fewer precautions"))


#Check renamings
table(conjoint_edu$Topic)
table(conjoint_edu$Gender.of.the.person)
table(conjoint_edu$Relation.to.you)
table(conjoint_edu$Closeness.with.you)
table(conjoint_edu$Apparent.knowledge.of.the.person)
table(conjoint_edu$Apparent.concern.of.the.person)
table(conjoint_edu$Apparent.motivation.of.the.person)


```

#CHANGE VARIABLE ORDER AND SET REFERENCE CATEGORIES

```{r}

#REORDER LEVELS
conjoint_edu$Topic <-                             factor(conjoint_edu$Topic,
                                                  levels = c("pesticides", "floods", "lifestyle diseases", "infectious diseases"))
conjoint_edu$Gender.of.the.person <-              factor(conjoint_edu$Gender.of.the.person,
                                                  levels = c(  "male", "female"))
conjoint_edu$Relation.to.you <-                   factor(conjoint_edu$Relation.to.you,
                                                  levels = c("colleague/fellow student", "friend", "relative"))
conjoint_edu$Closeness.with.you <-                factor(conjoint_edu$Closeness.with.you,
                                                  levels = c("not close", "somewhat close", "fairly close", "very close"))
conjoint_edu$Apparent.knowledge.of.the.person <-  factor(conjoint_edu$Apparent.knowledge.of.the.person,
                                                  levels = c("not well-informed", "somewhat well-informed", "fairly well-informed", "very well-informed"))
conjoint_edu$Apparent.concern.of.the.person <-    factor(conjoint_edu$Apparent.concern.of.the.person,
                                                  levels = c("not concerned", "somewhat concerned", "fairly concerned", "very concerned"))
conjoint_edu$Apparent.motivation.of.the.person <- factor(conjoint_edu$Apparent.motivation.of.the.person,
                                                  levels = c("to convince you to take more precautions", "to convince you to take fewer precautions", "to feel less anxious", "to exchange information with you"))


```

######RUN MAIN AMCE, MM, AND DIFFERENCES IN MM

```{r}
###AMCE

#Full model
amce_edu <- cj(conjoint_edu, selected ~ 
             Topic +
             Gender.of.the.person +
             Relation.to.you +
             Closeness.with.you + 
             Apparent.knowledge.of.the.person +
             Apparent.concern.of.the.person +
             Apparent.motivation.of.the.person, 
                  by = ~ edu_cat, 
                  id = ~ Response.ID,
                  estimate = "amce")


# Define the dodge width to create separation between lower and higher education lines
dodge_width <- 1

amce_edu_plot <- ggplot(amce_edu, aes(x = estimate, y = level, color = edu_cat, shape = edu_cat)) +
  geom_point(size = 2, position = position_dodge(width = dodge_width)) +  # Offset points
  geom_errorbarh(aes(xmin = lower, xmax = upper), height = 1, 
                 position = position_dodge(width = dodge_width)) +  # Offset error bars
  geom_vline(xintercept = 0, linetype = "dashed", color = "grey") +  # Add a dashed vertical line at x = 0
  scale_color_manual(values = c("cyan3", "blue4")) + 
  scale_shape_manual(values = c(16, 17)) +  # Custom shapes: 16 = filled circle, 17 = filled triangle
  theme_minimal() +  # Apply a minimal theme
  labs(x = "AMCE", y = "All attributes by education", color = "Education", shape = "Education") + # Customize labels
  theme(
    panel.background = element_rect(fill = "white", color = NA),  # Set the background of the plot panel to white
    plot.background = element_rect(fill = "white", color = NA)  # Set the overall background of the plot to white
  ) +
  xlim(-0.3, 0.3)  # Set the x-axis limits (replace with your desired values)

ggsave("figures/education/amce_edu.png", plot = amce_edu_plot, width = 8, height = 6, dpi = 300)


###MARGINAL MEANS

mm_edu <- cj(conjoint_edu, selected ~ 
                  Topic +
                  Gender.of.the.person +
                  Relation.to.you +
                  Closeness.with.you + 
                  Apparent.knowledge.of.the.person +
                  Apparent.concern.of.the.person +
                  Apparent.motivation.of.the.person, 
                  by = ~ edu_cat, 
                  id = ~ Response.ID,
                  estimate = "mm")


mm_edu$feature <- factor(mm_edu$feature,
                              levels = c("Topic",
                                         "Relation.to.you",
                                         "Apparent.knowledge.of.the.person",
                                         "Apparent.concern.of.the.person",
                                         "Apparent.motivation.of.the.person",
                                         "Closeness.with.you",
                                         "Gender.of.the.person"))

# Define the dodge width to create separation between lower and higher education lines
dodge_width <- 1

mm_edu_plot <- ggplot(mm_edu, aes(x = estimate, y = level, color = edu_cat, shape = edu_cat)) +
  geom_point(size = 2, position = position_dodge(width = dodge_width)) +  # Offset points
  geom_errorbarh(aes(xmin = lower, xmax = upper), height = 1, 
                 position = position_dodge(width = dodge_width)) +  # Offset error bars
  geom_vline(xintercept = 0, linetype = "dashed", color = "grey") +  # Add a dashed vertical line at x = 0
  scale_color_manual(values = c("cyan3", "blue4")) + 
  scale_shape_manual(values = c(16, 17)) +  # Custom shapes: 16 = filled circle, 17 = filled triangle
  theme_minimal() +  # Apply a minimal theme
  labs(x = "Marginal Means", y = "All attributes", color = "Education", shape = "Education") + # Customize labels
  theme(
    panel.background = element_rect(fill = "white", color = NA),  # Set the background of the plot panel to white
    plot.background = element_rect(fill = "white", color = NA)  # Set the overall background of the plot to white
  ) +
  xlim(0.3, 0.7)  # Set the x-axis limits (replace with your desired values)

ggsave("figures/education/mm_edu.png", plot = mm_edu_plot, width = 8, height = 6, dpi = 300)


###DIFFERENCES IN MARGINAL MEANS

diff_edu <- cj(conjoint_edu, selected ~ 
                  Topic +
                  Gender.of.the.person +
                  Relation.to.you +
                  Closeness.with.you + 
                  Apparent.knowledge.of.the.person +
                  Apparent.concern.of.the.person +
                  Apparent.motivation.of.the.person, 
                  by = ~ edu_cat, 
                  id = ~ Response.ID,
                  estimate = "mm_differences")


diff_edu$feature <- factor(diff_edu$feature,
                              levels = c("Topic",
                                         "Relation.to.you",
                                         "Apparent.knowledge.of.the.person",
                                         "Apparent.concern.of.the.person",
                                         "Apparent.motivation.of.the.person",
                                         "Closeness.with.you",
                                         "Gender.of.the.person"))

# Define the dodge width to create separation between lower and higher education lines
dodge_width <- 1

diff_edu_plot <- ggplot(diff_edu, aes(x = estimate, y = level, color = edu_cat, shape = edu_cat)) +
  geom_point(size = 2, position = position_dodge(width = dodge_width)) +  # Offset points
  geom_errorbarh(aes(xmin = lower, xmax = upper), height = 1, 
                 position = position_dodge(width = dodge_width)) +  # Offset error bars
  geom_vline(xintercept = 0.5, linetype = "dashed", color = "grey") +  # Add a dashed vertical line at x = 0
  scale_color_manual(values = c("blue4")) + 
  scale_shape_manual(values = c(17)) +  # Custom shapes: 16 = filled circle, 17 = filled triangle
  theme_minimal() +  # Apply a minimal theme
  labs(x = "Differences in Marginal Means", y = "All attributes", color = "Education", shape = "Education") + # Customize labels
  theme(
    panel.background = element_rect(fill = "white", color = NA),  # Set the background of the plot panel to white
    plot.background = element_rect(fill = "white", color = NA)  # Set the overall background of the plot to white
  ) +
  xlim(-0.15, 0.15)  # Set the x-axis limits (replace with your desired values)

ggsave("figures/education/diff_edu.png", plot = diff_edu_plot, width = 8, height = 6, dpi = 300)


```
#####DIAGNOSTICS

```{r}

# BALANCE TESTING

# Manually map age_cat to numeric (assuming "not university-educated" and "university-educated")
conjoint_edu$edu_cat_num <- ifelse(conjoint_edu$edu_cat == "not university-educated", 0, 
                                      ifelse(conjoint_edu$edu_cat == "university-educated", 1, NA))

# Check for any NA values introduced by this process
table(conjoint_edu$edu_cat_num)

# Calculate marginal means using your dataset and attributes
edubalance <- plot(mm(conjoint_edu, edu_cat_num ~ 
                 Topic +
                 Gender.of.the.person +
                 Apparent.motivation.of.the.person +
                 Apparent.concern.of.the.person +
                 Relation.to.you +
                 Closeness.with.you + 
                 Apparent.knowledge.of.the.person,
                 id = ~Response.ID), 
                xlim = c(0, 0.4), vline = mean(conjoint_edu$edu_cat_num, na.rm = TRUE))

# Adjust the plot for larger text and dots
edubalance_adj <- edubalance +
  geom_point(size = 3, position = position_dodge(width = 0.5)) +
    ggtitle("Balance Testing: Education") +  # Add the title here
    labs(color = "Attribute") +  # Change the label for the color legend
  theme(
    text = element_text(size = 20),            # Increase base text size
    axis.title = element_text(size = 22),      # Increase axis title size
    axis.text = element_text(size = 18),       # Increase axis tick labels size
    legend.title = element_text(size = 22),    # Increase legend title size
    legend.text = element_text(size = 18),     # Increase legend text size
    plot.title = element_text(size = 24, face = "bold"),  # Increase plot title size and style
    strip.text = element_text(size = 18)       # Increase facet labels text size (if using facets)
  ) +
  guides(color = guide_legend(nrow = 4))  # Adjust the number of rows in the legend

# Save the plot
ggsave("diagnostics/balance_edu.png", plot = edubalance_adj, width = 15, height = 11, dpi = 300)


```

Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.
